/******************************************************************************
* build bank covariates from FDIC 
*****************************************************************************/


clear all
set more off
global proj "/Users/hanyang/tu2"
global dataRoot "${proj}/Data"


*Data sources:
*	FDIC https://www5.fdic.gov/sdi/download_large_list_outside.asp
* 			documentation: https://www5.fdic.gov/sdi/sitemap.asp

*** load FDIC 2014 banks asset/liability
import delimited "$dataRoot/Raw/All_Reports_20140630_Assets and Liabilities.csv", clear
keep cert zip depdom asset liab

compress
save "$dataRoot/Clean/FDIC_bank_asset_2014", replace



*** load FDIC 2014 bank branch level data
import delimited "$dataRoot/Raw/FDIC_2014.csv", clear
keep depsumbr sims_established_date bkclass cert namefull uninumbr citybr stcntybr stalpbr year
destring depsumbr, ignore(",") replace
	
* national bank 
gen national=(bkclass=="N")	

*** merge bank level performance data
merge m:1 cert using "$dataRoot/Clean/FDIC_bank_asset_2014"
keep if _merge==3
drop _merge
	
	
*cross walk county to cz
rename stcntybr county_id
merge m:1 county_id using "$dataRoot/Crosswalk/cw_cty00_cz.dta"
keep if _merge==3	
drop _merge
	
*** merge with county population and square mile
rename county_id county
merge m:1 county using "$dataRoot/Crosswalk/county_population_area.dta"
keep if _merge==3
	
	
*** collapse at cz level
collapse (count) numbr=uninumbr (sum) landsqmi (mean) national , by(year cz czname state_id stateabbrv) fast

*share of national banks
replace national=national*100
gen numbr_sqmi = 1000*numbr/landsqmi
	replace numbr_sqmi = log(numbr_sqmi+1)
	
	
	
*label variables
label var landsqmi    "Area Square Mile"
label var numbr 	  "ln num branches"
label var national    "share of natioanl banks by branch"
label var numbr_sqmi  "ln branch/k sq mile"
	
keep numbr numbr_sqmi national year cz* state*

compress
save "$dataRoot/Clean/TU_Geo_Debt_Banking_Access_2014", replace


